Stock Market Prediction

In [1]:
import math,random
import quandl
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression,SGDRegressor,BayesianRidge,ARDRegression,PassiveAggressiveRegressor,TheilSenRegressor
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor,StackingRegressor,VotingRegressor
from sklearn.neural_network import MLPRegressor
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')
In [2]:
stock = 'MSFT'
daysToForecast = 1000
In [3]:
def getStockData(stock):
    quandl.ApiConfig.api_key = "qWcicxSctVxrP9PhyneG"
    allData = quandl.get('WIKI/'+stock)
    return allData
In [4]:
def FormatDataForModel(dataArray):
    dataArray = dataArray[['Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume']]
    dataArray['HL_PCT'] = (dataArray['Adj. High'] - dataArray['Adj. Close']) / dataArray['Adj. Close'] * 100.0
    dataArray['PCT_change'] = (dataArray['Adj. Close'] - dataArray['Adj. Open']) / dataArray['Adj. Open'] * 100.0
    dataArray = dataArray[['Adj. Close', 'HL_PCT', 'PCT_change','Adj. Volume']]
    dataArray.fillna(-99999, inplace=True)
    return dataArray
In [5]:
def PreprocessData(mlData,daysToForecast):
    forecast_col = 'Adj. Close'
    forecast_out = int(math.ceil(0.12*daysToForecast))
    mlData['label'] = mlData[forecast_col].shift(-forecast_out)
    #mlData.dropna(inplace=True)
    X = np.array(mlData.drop(['label'],1))
    X = preprocessing.scale(X)
    X_data = X[-daysToForecast:]
    X = X[:-daysToForecast]
    forecastData = mlData[-daysToForecast:]
    trainData= mlData[:-daysToForecast]
    y = np.array(trainData['label'])
    response = [X,y,X_data,forecastData]
    return response
In [6]:
def TrainAndPredict(model,X,y,X_data):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    model.fit(X_train, y_train)
    accuracy = model.score(X_test, y_test)
    prediction = model.predict(X_data)
    return accuracy, prediction
In [7]:
def addPredictionToForecast(prediction,forecastData):
    forecastData = forecastData[['Adj. Close']]
    forecastData = forecastData.rename(columns={'Adj. Close':'EOD'})
    forecastData['prediction'] = prediction[:]
    return forecastData
In [8]:
def GraphPredictions(forecastData,stock):
    fig = px.line(forecastData)
    fig.update_layout(title=stock,
                   xaxis_title='Time',
                   yaxis_title='Price')
    fig.show()
In [9]:
def GraphAllData(allData,forecastData,stock):
    result = pd.concat([allData['Adj. Close'],forecastData['prediction']],axis =1, sort=False)
    fig = px.line(result)
    fig.update_layout(title=stock,
                   xaxis_title='Time',
                   yaxis_title='Price')
    fig.show()
In [10]:
allData = getStockData(stock)
mlData = FormatDataForModel(allData)
X,y,X_data,forecastData = PreprocessData(mlData,daysToForecast)
model = LinearRegression()
accuracy,prediction=TrainAndPredict(model,X,y,X_data)
forecastData = addPredictionToForecast(prediction,forecastData)
In [11]:
print(accuracy)
0.9266544588978678
In [12]:
GraphPredictions(forecastData,stock)
In [13]:
GraphAllData(allData,forecastData,stock)
In [14]:
stock_list = ['AAPL', 'IBM', 'MSFT', 'WMT','AMZN','TSLA','HP']

for stock in stock_list:

print("Stock: ", stock)
allData = getStockData(stock)
mlData = FormatDataForModel(allData)
X,y,X_data,forecastData = PreprocessData(mlData,daysToForecast)
model = LinearRegression()
accuracy,prediction=TrainAndPredict(model,X,y,X_data)
print("Accuracy: ", accuracy)
forecastData = addPredictionToForecast(prediction,forecastData)
GraphPredictions(forecastData,stock)
GraphAllData(allData,forecastData,stock)
In [15]:
model_list = [[LinearRegression(), "LinearRegression"],
              [SVR(),"SupportVectorRegression"],
              [MLPRegressor(),"MLPRegressor"],
              [SGDRegressor(),"SGDRegressor"],
              [BayesianRidge(),"BayesianRidge"],
              [ARDRegression(),"ARDRegression"],
              [PassiveAggressiveRegressor(),"PassiveAggressiveRegressor"],
              [TheilSenRegressor(),"TheilSenRegressor"]]
In [16]:
model_results = []
stock_dfs = []
for stock in stock_list:
    print("Stock: ", stock)
    allData = getStockData(stock)
    mlData = FormatDataForModel(allData)
    X,y,X_data,forecastData = PreprocessData(mlData,daysToForecast)
    df_stocks = forecastData[['Adj. Close']]
    df_stocks = df_stocks.rename(columns={'Adj. Close':stock+' Actual'})
    for model,name in model_list:
        accuracy,prediction=TrainAndPredict(model,X,y,X_data)
        print("Model: ",name , "  ","Accuracy:", accuracy)
        model_results.append((name,stock,accuracy))
        df_stocks[name] = prediction[:]
    stock_dfs.append((stock,df_stocks))
Stock:  AAPL
Model:  LinearRegression    Accuracy: 0.9416689581511002
Model:  SupportVectorRegression    Accuracy: 0.9390996693750562
Model:  MLPRegressor    Accuracy: 0.9584427769649096
Model:  SGDRegressor    Accuracy: 0.9405753204592563
Model:  BayesianRidge    Accuracy: 0.9284061175919492
Model:  ARDRegression    Accuracy: 0.9440036239170232
Model:  PassiveAggressiveRegressor    Accuracy: 0.9259926425341047
Model:  TheilSenRegressor    Accuracy: 0.9381404255669968
Stock:  IBM
Model:  LinearRegression    Accuracy: 0.9693906628320345
Model:  SupportVectorRegression    Accuracy: 0.9605639910864715
Model:  MLPRegressor    Accuracy: 0.972413093507511
Model:  SGDRegressor    Accuracy: 0.9698847774393504
Model:  BayesianRidge    Accuracy: 0.9682668418685889
Model:  ARDRegression    Accuracy: 0.9672406909102289
Model:  PassiveAggressiveRegressor    Accuracy: 0.9667874235545553
Model:  TheilSenRegressor    Accuracy: 0.9709534915458214
Stock:  MSFT
Model:  LinearRegression    Accuracy: 0.9193764282618198
Model:  SupportVectorRegression    Accuracy: 0.9075440346393898
Model:  MLPRegressor    Accuracy: 0.9313824596993524
Model:  SGDRegressor    Accuracy: 0.9168721053758682
Model:  BayesianRidge    Accuracy: 0.9166553204473452
Model:  ARDRegression    Accuracy: 0.9161877569548807
Model:  PassiveAggressiveRegressor    Accuracy: 0.8262944928878577
Model:  TheilSenRegressor    Accuracy: 0.917093515322345
Stock:  WMT
Model:  LinearRegression    Accuracy: 0.978235049346929
Model:  SupportVectorRegression    Accuracy: 0.970138840204908
Model:  MLPRegressor    Accuracy: 0.981007472049317
Model:  SGDRegressor    Accuracy: 0.9788062143260913
Model:  BayesianRidge    Accuracy: 0.9778300976543168
Model:  ARDRegression    Accuracy: 0.9780491178439437
Model:  PassiveAggressiveRegressor    Accuracy: 0.970125770607604
Model:  TheilSenRegressor    Accuracy: 0.9781513183688831
Stock:  AMZN
Model:  LinearRegression    Accuracy: 0.9250830965982197
Model:  SupportVectorRegression    Accuracy: 0.8120796747451602
Model:  MLPRegressor    Accuracy: 0.9305107247936262
Model:  SGDRegressor    Accuracy: 0.9174657916793137
Model:  BayesianRidge    Accuracy: 0.9199857857638271
Model:  ARDRegression    Accuracy: 0.9188239686505218
Model:  PassiveAggressiveRegressor    Accuracy: 0.9203541474704149
Model:  TheilSenRegressor    Accuracy: 0.9230552787704296
Stock:  TSLA
Model:  LinearRegression    Accuracy: 0.8262625839544878
Model:  SupportVectorRegression    Accuracy: 0.49754170962248667
Model:  MLPRegressor    Accuracy: 0.37878951319576615
Model:  SGDRegressor    Accuracy: 0.8342150328890259
Model:  BayesianRidge    Accuracy: 0.8021059335305849
Model:  ARDRegression    Accuracy: 0.8370301267107425
Model:  PassiveAggressiveRegressor    Accuracy: 0.7586666582375814
Model:  TheilSenRegressor    Accuracy: 0.8001529621360151
Stock:  HP
Model:  LinearRegression    Accuracy: 0.8556179733843082
Model:  SupportVectorRegression    Accuracy: 0.8730289245378937
Model:  MLPRegressor    Accuracy: 0.8871642641957521
Model:  SGDRegressor    Accuracy: 0.8523232230997063
Model:  BayesianRidge    Accuracy: 0.8544699535179128
Model:  ARDRegression    Accuracy: 0.8624011787134958
Model:  PassiveAggressiveRegressor    Accuracy: 0.8544152376667216
Model:  TheilSenRegressor    Accuracy: 0.8679650149758233
In [17]:
model_names = []
for model,name in model_list:
    model_names.append(name)
df = pd.DataFrame(columns=stock_list,index=model_names)
for i in model_results:
    df.at[i[0],i[1]] = i[2]
df
Out[17]:
AAPL IBM MSFT WMT AMZN TSLA HP
LinearRegression 0.941669 0.969391 0.919376 0.978235 0.925083 0.826263 0.855618
SupportVectorRegression 0.9391 0.960564 0.907544 0.970139 0.81208 0.497542 0.873029
MLPRegressor 0.958443 0.972413 0.931382 0.981007 0.930511 0.37879 0.887164
SGDRegressor 0.940575 0.969885 0.916872 0.978806 0.917466 0.834215 0.852323
BayesianRidge 0.928406 0.968267 0.916655 0.97783 0.919986 0.802106 0.85447
ARDRegression 0.944004 0.967241 0.916188 0.978049 0.918824 0.83703 0.862401
PassiveAggressiveRegressor 0.925993 0.966787 0.826294 0.970126 0.920354 0.758667 0.854415
TheilSenRegressor 0.93814 0.970953 0.917094 0.978151 0.923055 0.800153 0.867965
In [18]:
highest = []
for i in df.columns:
    highest.append([i, df[i].astype(float).idxmax(), df[i].max()])
df_high = pd.DataFrame(highest, columns=["Stock","Model","Accuracy"])
df_high
Out[18]:
Stock Model Accuracy
0 AAPL MLPRegressor 0.958443
1 IBM MLPRegressor 0.972413
2 MSFT MLPRegressor 0.931382
3 WMT MLPRegressor 0.981007
4 AMZN MLPRegressor 0.930511
5 TSLA ARDRegression 0.837030
6 HP MLPRegressor 0.887164
In [19]:
average = df.mean(axis=1)
In [20]:
average.sort_values(ascending=False)
Out[20]:
ARDRegression                 0.917677
LinearRegression              0.916519
SGDRegressor                  0.915735
TheilSenRegressor             0.913645
BayesianRidge                 0.909674
PassiveAggressiveRegressor    0.888948
MLPRegressor                  0.862816
SupportVectorRegression       0.851428
dtype: float64
In [21]:
for stock,stock_df in stock_dfs:
    fig = px.line(stock_df)
    fig.update_layout(title=stock,
                   xaxis_title='Time',
                   yaxis_title='Price')
    fig.show()